Occupancy modelling with unmarked

Course 3 Occupancy models

Preparations

Load libraries

library(unmarked) #main package 
library(terra)    #needed for prediction maps

Set workspace, data directories

### The workspace  (repetition) 
getwd() # you can also use the package 'here()'
## [1] "C:/Users/sollmann/Documents/D6/Kurs Steph 2026/d6_teaching_collection-main/d6_teaching_collection-main/R/Course3_b_presence_absence_occupancy"
root_wd <- here::here() 
# relative to work-wd

##detection data
data_wd <- paste(root_wd,"/","data/data_berlin/animal_data",sep='') 

## covariate maps
maps_wd <- paste(root_wd,"/","data/data_berlin/geo_raster_current_gtif",sep='')

Load, explore the detection data

## read in detection data - example: raccoon
y<-readRDS(paste0(data_wd, '/raccoon_spring2019.RDS'))

## Basic data summaries:

##how many locations sampled?
J=nrow(y)
##how many repeat samples?
K=ncol(y)

##naive occupancy (number of sites with detections)
sum(apply(y,1,sum)>0)/nrow(y)
## [1] 0.4615385
##number of detections in each sampling occasion
apply(y,2,sum)
## occ1 occ2 occ3 occ4 
##   35   34   36   25

Load, explore covariate data

## read in covariate data
covs<-readRDS(paste0(data_wd, '/covenv_spring2019.RDS'))

## look at covariates
head(covs)
##   User_uid Local_tree_cover     pop_100 dist_water_100 imperv_100 noise_100
## 2      381               40 102.7272034       647.3228   81.60653  52.22244
## 3      382                5  43.6494637      1107.4154   65.60398  61.72527
## 4      383               20 139.3569641       519.9907   70.26485  49.98725
## 5      389               33  39.5562325       125.3740   51.60028  58.01681
## 6      390               20 111.5275955       134.6375   49.80275  56.28348
## 7      391                1   0.1371695       168.7975   23.21507  35.46156
##   tree_cover_100 lat_round lon_round
## 2       4.307139     52.61     13.43
## 3      29.130781     52.54     13.13
## 4      31.433018     52.61     13.42
## 5      39.235077     52.58     13.42
## 6      21.789064     52.58     13.42
## 7      26.736300     52.56     13.16
## we will create an observation covariate (varies across visits)
## to test whether detection probability increases or decreases
## over time (maybe animals are afraid of camera traps = novel
## objects initially, but gradually get used to them)

time<-matrix((0:3), nrow=J, ncol=K, byrow=T)
head(time)
##      [,1] [,2] [,3] [,4]
## [1,]    0    1    2    3
## [2,]    0    1    2    3
## [3,]    0    1    2    3
## [4,]    0    1    2    3
## [5,]    0    1    2    3
## [6,]    0    1    2    3
##convert to dataframe
time.df<-data.frame(time)
colnames(time.df)<-paste0('time.', 1:4)

Format data for analysis

## create unmarkedFrame

umf<-unmarkedFrameOccu(y=y, #matrix
                       siteCovs = covs, #dataframe
                       obsCovs = list(time=time.df)) #list of dataframes
summary(umf)
## unmarkedFrame Object
## 
## 143 sites
## Maximum number of observations per site: 4 
## Mean number of observations per site: 4 
## Sites with at least one detection: 66 
## 
## Tabulation of y observations:
##   0   1 
## 442 130 
## 
## Site-level covariates:
##     User_uid     Local_tree_cover    pop_100       dist_water_100   
##  Min.   :381.0   Min.   :  0.00   Min.   :  0.00   Min.   :  20.45  
##  1st Qu.:458.0   1st Qu.: 10.00   1st Qu.: 17.36   1st Qu.: 324.83  
##  Median :508.0   Median : 15.00   Median : 34.90   Median : 548.85  
##  Mean   :507.0   Mean   : 18.18   Mean   : 50.80   Mean   : 708.02  
##  3rd Qu.:563.5   3rd Qu.: 20.00   3rd Qu.: 60.35   3rd Qu.: 959.96  
##  Max.   :619.0   Max.   :100.00   Max.   :415.41   Max.   :2124.07  
##    imperv_100        noise_100     tree_cover_100    lat_round    
##  Min.   : 0.2231   Min.   :35.03   Min.   : 0.00   Min.   :52.40  
##  1st Qu.:39.4594   1st Qu.:51.60   1st Qu.:23.22   1st Qu.:52.45  
##  Median :55.4379   Median :54.79   Median :33.40   Median :52.49  
##  Mean   :51.9747   Mean   :55.60   Mean   :34.34   Mean   :52.50  
##  3rd Qu.:64.4549   3rd Qu.:60.80   3rd Qu.:47.67   3rd Qu.:52.56  
##  Max.   :99.0705   Max.   :74.72   Max.   :72.62   Max.   :52.65  
##    lon_round    
##  Min.   :13.12  
##  1st Qu.:13.32  
##  Median :13.43  
##  Mean   :13.42  
##  3rd Qu.:13.53  
##  Max.   :13.71  
## 
## Observation-level covariates:
##       time     
##  Min.   :0.00  
##  1st Qu.:0.75  
##  Median :1.50  
##  Mean   :1.50  
##  3rd Qu.:2.25  
##  Max.   :3.00

Occupancy modeling

Run simplest occpancy model - no covariates

##start with null model (no covariates)
m0<-occu(~1~1, data=umf)
summary(m0)
## 
## Call:
## occu(formula = ~1 ~ 1, data = umf)
## 
## Occupancy (logit-scale):
##  Estimate    SE     z P(>|z|)
##    0.0381 0.192 0.198   0.843
## 
## Detection (logit-scale):
##  Estimate    SE    z P(>|z|)
##    -0.217 0.144 -1.5   0.134
## 
## AIC: 556.5455 
## Number of sites: 143
##manual backtransformation - occupancy
exp(0.0381)/(1+exp(0.0381))
## [1] 0.5095238
plogis(0.0381)
## [1] 0.5095238
##compare to naive occu

## detection
plogis(-0.217)
## [1] 0.4459619
## better: use predict() function
head(predict(m0, type='state')) # state = occupancy
##   Predicted         SE     lower    upper
## 1 0.5095135 0.04800408 0.4161927 0.602176
## 2 0.5095135 0.04800408 0.4161927 0.602176
## 3 0.5095135 0.04800408 0.4161927 0.602176
## 4 0.5095135 0.04800408 0.4161927 0.602176
## 5 0.5095135 0.04800408 0.4161927 0.602176
## 6 0.5095135 0.04800408 0.4161927 0.602176
head(predict(m0, type='det')) # det = detection
##   Predicted         SE     lower     upper
## 1 0.4460575 0.03570078 0.3775881 0.5166368
## 2 0.4460575 0.03570078 0.3775881 0.5166368
## 3 0.4460575 0.03570078 0.3775881 0.5166368
## 4 0.4460575 0.03570078 0.3775881 0.5166368
## 5 0.4460575 0.03570078 0.3775881 0.5166368
## 6 0.4460575 0.03570078 0.3775881 0.5166368
##produces a value for each sampling location (and occasion)
##more useful for models with covariates

Run a model with an occupancy covariate

## run model with tree cover 100 on occupancy
m.tree100<-occu(~1~tree_cover_100, data=umf)
summary(m.tree100)
## 
## Call:
## occu(formula = ~1 ~ tree_cover_100, data = umf)
## 
## Occupancy (logit-scale):
##                Estimate     SE     z P(>|z|)
## (Intercept)     -0.8450 0.4385 -1.93  0.0540
## tree_cover_100   0.0258 0.0118  2.18  0.0291
## 
## Detection (logit-scale):
##  Estimate    SE     z P(>|z|)
##    -0.219 0.145 -1.51   0.131
## 
## AIC: 553.4082 
## Number of sites: 143
## now, predict will calculate occu probability for each location
## based on its tree cover value
head(predict(m.tree100, type='state')) # state = occupancy
##   Predicted         SE     lower     upper
## 1 0.3243848 0.08628395 0.1816305 0.5094871
## 2 0.4769126 0.05066007 0.3797919 0.5758111
## 3 0.4917655 0.04963342 0.3959987 0.5881405
## 4 0.5420535 0.05229126 0.4392358 0.6414100
## 5 0.4299448 0.05846560 0.3209048 0.5462334
## 6 0.4615081 0.05253579 0.3615596 0.5646500
##we can use this to plot occupancy probability as a function
## of tree cover
pred.tree<-predict(m.tree100, type='state')

plot(covs$tree_cover_100, pred.tree$Predicted)

##IMPORTANT: this simple approach only works for models with
##           only one covariate

Run a model with multiple covariates on occupancy

## let's look at an example with two covariates: 
## tree cover at the local and the 100-m scale

m.tree2<-occu(~1~tree_cover_100+Local_tree_cover, data=umf)

pred.tree2<-predict(m.tree2, type='state')
plot(covs$tree_cover_100, pred.tree2$Predicted)

##this happens because predict considers both variables when
## calculating the occupancy probability at each location
## but we can only plot against one covariate

## using predict when there is more than one covariate
## create a data frame with new predictor values 
## create a range of values for the predictor you want to look at
## keep a fixed value (0 or the mean) for the other predictor(s)
## NOTE: ALL predictors that are in the model must be in the new
##       data frame; the column names must be identical to those 
##       in the data
new.cov<-data.frame(tree_cover_100 = seq(0,100,1),#0-100
                    Local_tree_cover = mean(covs$Local_tree_cover))

pred.tree3<-predict(m.tree2,newdata=new.cov, type='state')
plot(new.cov$tree_cover_100, pred.tree3$Predicted)

Run a model with a detection covariate

## now let's check for an effect of time on detection
m.time<-occu(~time~1, data=umf)
summary(m.time)
## 
## Call:
## occu(formula = ~time ~ 1, data = umf)
## 
## Occupancy (logit-scale):
##  Estimate    SE     z P(>|z|)
##    0.0342 0.192 0.179   0.858
## 
## Detection (logit-scale):
##             Estimate    SE       z P(>|z|)
## (Intercept)   0.0198 0.217  0.0913   0.927
## time         -0.1564 0.106 -1.4714   0.141
## 
## AIC: 556.3619 
## Number of sites: 143
pred.time<-predict(m.time, type='det')

plot(as.vector(t(time)), pred.time$Predicted)

## or using new data
new.timecov<-data.frame(time=0:3)
pred.time2<-predict(m.time,newdata=new.timecov, type='det')
plot(new.timecov$time, pred.time2$Predicted)

Model selection via AIC

##fit multiple models: here with and without time, 
## with and without tree cover
## (top three are repeats from above)
m0<-occu(~1~1, data=umf)
m.tree100<-occu(~1~tree_cover_100, data=umf)
m.time<-occu(~time~1, data=umf)
m.tree.time<-occu(~time~tree_cover_100, data=umf)

##which of these is the best model?
##create a named fitlist
fl<-fitList(null=m0,
            tree=m.tree100,
            time=m.time,
            treeTime=m.tree.time)
##perform AIC based model selection
modSel(fl)
##          nPars    AIC delta AICwt cumltvWt
## treeTime     4 553.23  0.00 0.433     0.43
## tree         3 553.41  0.18 0.395     0.83
## time         3 556.36  3.14 0.090     0.92
## null         2 556.55  3.32 0.082     1.00
## treeTime and tree are very similar 
## and both are better than the two models without tree cover

Prediction maps

Read in treecover raster

## Read in raster of tree cover at 100-m scale
tree.berlin<-rast(paste0(maps_wd,'/tree_cover_density_2012_100m_3035.tif'))
## Look at raster
plot(tree.berlin)

Format treecover raster data for predictions

## make dataframe with treecover values for predictions
## select only raster cells that have a value (ie, that are not NA)
new.tree<-data.frame(tree_cover_100=values(tree.berlin)[!is.na(values(tree.berlin))]/100)

## make predictions from treecover model
pred.berlin<-predict(m.tree100, newdata=new.tree, type='state')

Create prediction raster (map)

## create duplicate of treecover data (this maintains resolution, extent etc)
raccoon.occu<-tree.berlin

## replace treecover values with predictions of raccoon occupancy
## again, only for non-NA values
values(raccoon.occu)[!is.na(values(tree.berlin))]<-pred.berlin$Predicted

## plot occupancy raster
plot(raccoon.occu)




Session Info
Sys.time()
## [1] "2026-02-02 11:43:55 CET"
#git2r::repository() ## uncomment if you are using GitHub
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=German_Germany.utf8  LC_CTYPE=German_Germany.utf8   
## [3] LC_MONETARY=German_Germany.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=C                      
## 
## time zone: Europe/Berlin
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] terra_1.8-70   unmarked_1.5.1
## 
## loaded via a namespace (and not attached):
##  [1] cli_3.6.5         knitr_1.50        rlang_1.1.6       xfun_0.54        
##  [5] reformulas_0.4.1  textshaping_1.0.3 jsonlite_2.0.0    rprojroot_2.1.1  
##  [9] htmltools_0.5.9   ragg_1.5.0        sass_0.4.10       rmarkdown_2.30   
## [13] grid_4.5.1        evaluate_1.0.5    jquerylib_0.1.4   MASS_7.3-65      
## [17] rmdformats_1.0.4  fastmap_1.2.0     yaml_2.3.11       lifecycle_1.0.4  
## [21] bookdown_0.46     compiler_4.5.1    codetools_0.2-20  Rcpp_1.1.0       
## [25] here_1.0.2        rstudioapi_0.17.1 systemfonts_1.3.0 lattice_0.22-7   
## [29] digest_0.6.39     R6_2.6.1          Rdpack_2.6.4      parallel_4.5.1   
## [33] rbibutils_2.3     Matrix_1.7-3      bslib_0.9.0       tools_4.5.1      
## [37] cachem_1.1.0